Here is Homework 11. The data I will be using is Julian Days to bud flush for different genotypes of Balsam Poplar across a variety of lengths of chilling.
# Preliminaries
library(ggplot2)
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 3.4.4
# First I need to read in my dataset (I am going to copy in some code from my own code that will bring in a file and alter it into usable data)
setwd("~/Documents/UVM_2018/BIO381")
#+++++++++++++++++++++++++
# Function to calculate the mean and the standard deviation
# for each group
#+++++++++++++++++++++++++
# data : a data frame
# varname : the name of a column containing the variable
#to be summariezed
# groupnames : vector of column names to be used as
# grouping variables
data_summary <- function(data, varname, groupnames){
require(plyr)
summary_func <- function(x, col){
c(mean = mean(x[[col]], na.rm=TRUE),
sd = sd(x[[col]], na.rm=TRUE))
}
data_sum<-ddply(data, groupnames, .fun=summary_func,
varname)
data_sum <- rename(data_sum, c("mean" = varname))
return(data_sum)
}
chill <- read.csv("ChillingExpt.csv", header=T)
str(chill)
## 'data.frame': 40 obs. of 13 variables:
## $ ind_code: Factor w/ 10 levels "CPL_03","CPL_10",..: 3 3 10 10 5 5 2 2 8 8 ...
## $ SampleID: num 1 1 2 2 3 3 4 4 5 5 ...
## $ JulBF_0 : int 32 35 32 30 34 36 39 38 56 47 ...
## $ JulBF_1 : int NA 33 33 35 41 40 40 47 56 54 ...
## $ JulBF_2 : int 44 44 40 41 45 41 46 48 56 56 ...
## $ JulBF_3 : int NA 54 45 45 48 48 59 53 60 57 ...
## $ JulBF_4 : int 54 62 47 50 58 53 58 57 NA 73 ...
## $ JulBF_5 : int 60 64 55 53 62 65 60 63 81 73 ...
## $ JulBF_6 : int 68 77 62 61 71 71 78 85 82 NA ...
## $ JulBF_7 : int 82 84 69 69 74 74 76 80 87 84 ...
## $ JulBF_8 : int NA NA 76 74 81 85 82 83 87 87 ...
## $ JulBF_9 : int NA NA 82 78 87 90 88 88 94 91 ...
## $ JulBF_10: int NA 87 84 84 94 94 91 90 NA 94 ...
subtractor <- c(0,0,5,12,19,26,33,40,47,54,61,68,75)
# create a for loop that will subract the correct value from each column
for (i in 3:13) {
chill[,i] <- chill[,i] - subtractor[i]
}
summary(chill)
## ind_code SampleID JulBF_0 JulBF_1
## CPL_03 : 4 Min. : 1.00 Min. :20.00 Min. :13.00
## CPL_10 : 4 1st Qu.: 5.15 1st Qu.:26.00 1st Qu.:22.00
## FNO_12 : 4 Median :11.00 Median :29.50 Median :26.00
## FNO_15 : 4 Mean :10.72 Mean :31.06 Mean :28.38
## HWK_11 : 4 3rd Qu.:16.25 3rd Qu.:35.50 3rd Qu.:33.50
## HWK_14 : 4 Max. :20.00 Max. :51.00 Max. :68.00
## (Other):16 NA's :4 NA's :1
## JulBF_2 JulBF_3 JulBF_4 JulBF_5
## Min. :13.00 Min. :12.00 Min. :12.00 Min. :11.00
## 1st Qu.:22.00 1st Qu.:20.00 1st Qu.:18.00 1st Qu.:16.00
## Median :26.00 Median :23.00 Median :24.00 Median :22.00
## Mean :27.63 Mean :24.21 Mean :23.31 Mean :22.35
## 3rd Qu.:33.50 3rd Qu.:28.00 3rd Qu.:29.00 3rd Qu.:27.00
## Max. :54.00 Max. :34.00 Max. :40.00 Max. :41.00
## NA's :5 NA's :7 NA's :1 NA's :3
## JulBF_6 JulBF_7 JulBF_8 JulBF_9
## Min. :13.00 Min. :11.00 Min. : 8.00 Min. : 8.00
## 1st Qu.:19.00 1st Qu.:15.00 1st Qu.:14.00 1st Qu.:15.00
## Median :26.00 Median :20.00 Median :19.00 Median :20.00
## Mean :25.89 Mean :21.06 Mean :18.54 Mean :18.21
## 3rd Qu.:33.00 3rd Qu.:27.00 3rd Qu.:22.50 3rd Qu.:22.00
## Max. :44.00 Max. :33.00 Max. :29.00 Max. :26.00
## NA's :3 NA's :5 NA's :5 NA's :11
## JulBF_10
## Min. : 9.00
## 1st Qu.:12.00
## Median :15.00
## Mean :14.84
## 3rd Qu.:18.00
## Max. :19.00
## NA's :15
chill2 <- reshape(chill, varying=3:13, sep="_", direction="long")
str(chill2)
## 'data.frame': 440 obs. of 5 variables:
## $ ind_code: Factor w/ 10 levels "CPL_03","CPL_10",..: 3 3 10 10 5 5 2 2 8 8 ...
## $ SampleID: num 1 1 2 2 3 3 4 4 5 5 ...
## $ time : num 0 0 0 0 0 0 0 0 0 0 ...
## $ JulBF : num 27 30 27 25 29 31 34 33 51 42 ...
## $ id : int 1 2 3 4 5 6 7 8 9 10 ...
## - attr(*, "reshapeLong")=List of 4
## ..$ varying:List of 1
## .. ..$ JulBF: chr "JulBF_0" "JulBF_1" "JulBF_2" "JulBF_3" ...
## .. ..- attr(*, "v.names")= chr "JulBF"
## .. ..- attr(*, "times")= num 0 1 2 3 4 5 6 7 8 9 ...
## ..$ v.names: chr "JulBF"
## ..$ idvar : chr "id"
## ..$ timevar: chr "time"
summary(chill2)
## ind_code SampleID time JulBF
## CPL_03 : 44 Min. : 1.00 Min. : 0 Min. : 8.00
## CPL_10 : 44 1st Qu.: 5.15 1st Qu.: 2 1st Qu.:18.00
## FNO_12 : 44 Median :11.00 Median : 5 Median :22.00
## FNO_15 : 44 Mean :10.72 Mean : 5 Mean :23.61
## HWK_11 : 44 3rd Qu.:16.25 3rd Qu.: 8 3rd Qu.:29.00
## HWK_14 : 44 Max. :20.00 Max. :10 Max. :68.00
## (Other):176 NA's :60
## id
## Min. : 1.00
## 1st Qu.:10.75
## Median :20.50
## Mean :20.50
## 3rd Qu.:30.25
## Max. :40.00
##
head(chill2)
## ind_code SampleID time JulBF id
## 1.0 FNO_12 1 0 27 1
## 2.0 FNO_12 1 0 30 2
## 3.0 SKN_10 2 0 27 3
## 4.0 SKN_10 2 0 25 4
## 5.0 HWK_11 3 0 29 5
## 6.0 HWK_11 3 0 31 6
with(chill2, plot(time, JulBF))
# There is a bump in Jul days to BF at about 6 weeks of chilling. We determined that this is most likely due to the malfunctioning growth chamber that occurred a few days after week 6 was planted. We believe the interrupted 24 hour light (by 4 hours of darkness) is giving us this bump.
chill3 <- data_summary(chill2, varname="JulBF",
groupnames=c("time","ind_code"))
## Loading required package: plyr
# chill3 is now in the long format with means and standard deviations
#-----------------------------------------------------------------------
# the code that follows will now be what I do in class for the homework assignment
# Here is a basic plot
p1 <- ggplot(data=chill3, mapping = aes(x=time,y=JulBF)) + geom_point()
print(p1)
# Now I will try adding themes found in ggthemes
p1 + theme_pander()
p1 + theme_hc()
p1 + theme_calc()
p1 + theme_stata()
p1 + theme_tufte()
p1 + theme_wsj()
# ooh i like that one...
p1 + theme_wsj(base_size = 10,base_family = "serif")
# Now I want to add the individuals to my plot
p2 <- ggplot(data=chill3,mapping=aes(x=time,y=JulBF,col=ind_code)) + geom_point() + theme_wsj(base_size = 10,base_family = "serif")
print(p2)
# now add a smoothing line
p2 + geom_smooth()
## `geom_smooth()` using method = 'loess'
# remove confidence intervals
p2 + geom_smooth(se=FALSE)
## `geom_smooth()` using method = 'loess'
p2 + geom_smooth(se = FALSE, method = "glm")
p2 + geom_smooth(se = FALSE, method = "gam")
# The following is my graph up to now and I will try playing around with faceting
p3 <- ggplot(data=chill3,mapping=aes(x=time,y=JulBF, color=ind_code)) + geom_point() + theme_wsj(base_size = 10,base_family = "serif") + geom_smooth(se = FALSE)
print(p3)
## `geom_smooth()` using method = 'loess'
p3 + facet_grid(ind_code~.)
## `geom_smooth()` using method = 'loess'
p3 + facet_grid(.~ind_code)
## `geom_smooth()` using method = 'loess'
p3 + facet_grid(time~ind_code)
## `geom_smooth()` using method = 'loess'
p3 + facet_grid(ind_code~time)
## `geom_smooth()` using method = 'loess'
p3 + facet_grid(time~.)
## `geom_smooth()` using method = 'loess'
p3 + facet_grid(.~time)
## `geom_smooth()` using method = 'loess'
# faceting doesn't seem to work well with my data
# Now I will try doing a little more and see if I can add error bars to my points with the standard deviation in my data frame
p4 <- ggplot(chill3, aes(x=time, y=JulBF, color=ind_code)) +
geom_line() +
geom_point() +
theme_wsj(base_size = 10,base_family = "serif") +
geom_errorbar(aes(ymin=JulBF-sd, ymax=JulBF+sd), width=.2,
position=position_dodge(0.05))
print(p4)
## Warning: Removed 5 rows containing missing values (geom_errorbar).
p4 <- ggplot(chill3, aes(x=time, y=JulBF, color=ind_code)) +
geom_line() +
geom_point() +
theme_wsj(base_size = 10,base_family = "serif") +
geom_errorbar(aes(ymin=JulBF-sd, ymax=JulBF+sd), width=.2,
position=position_dodge(0.05)) +
facet_grid(ind_code~., scales = "free_y")
print(p4)
## Warning: Removed 5 rows containing missing values (geom_errorbar).
p5 <- ggplot(chill3, aes(x=time, y=JulBF, color=ind_code)) +
geom_line() +
geom_point() +
theme_wsj(base_size = 10,base_family = "serif") +
geom_errorbar(aes(ymin=JulBF-sd, ymax=JulBF+sd), width=.2,
position=position_dodge(0.05)) +
facet_grid(.~ind_code, scales = "free_y")
print(p5)
## Warning: Removed 5 rows containing missing values (geom_errorbar).